Data import

Import data in list via function Calll to import_csv_data().

data <- import_csv_data(path = "Olist_data/")
data_marketing <- import_csv_data(path = "Olist_Marketing_data/")

Overview of data

Source https://www.kaggle.com/jungjoonlee/eda-with-ecommerce-marketplace-seller-side

Marketing Qualified Lead (MQL) means a potential reseller/manufacturer who has an interest in selling their products on Olist. After a MQL filled a form on landing page to sign up for seller, a Sales Development Representative(SDR) contacted the MQL and gathered more information about the lead. Then a Sales Representative(SR) consulted the MQL. So interaction between SDRs/SRs and MQLs can affect conversion from MQLs to sellers. A MQL who finally signed up for seller is called a closed deal.

Closed Deals

skim(data_marketing$olist_closed_deals_dataset)
Data summary
Name data_marketing$olist_clos…
Number of rows 842
Number of columns 14
_______________________
Column type frequency:
factor 12
numeric 2
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
mql_id 0 1 FALSE 842 000: 1, 009: 1, 00d: 1, 010: 1
seller_id 0 1 FALSE 842 000: 1, 012: 1, 013: 1, 01f: 1
sdr_id 0 1 FALSE 32 4b3: 140, 068: 81, 56b: 74, 9d1: 66
sr_id 0 1 FALSE 22 4ef: 133, d3d: 82, 656: 74, 85f: 64
won_date 0 1 FALSE 824 201: 6, 201: 4, 201: 3, 201: 3
business_segment 0 1 FALSE 34 hom: 105, hea: 93, car: 77, hou: 71
lead_type 0 1 FALSE 9 onl: 332, onl: 126, ind: 123, off: 104
lead_behaviour_profile 0 1 FALSE 10 cat: 407, emp: 177, eag: 123, wol: 95
has_company 0 1 FALSE 3 emp: 779, Tru: 58, Fal: 5
has_gtin 0 1 FALSE 3 emp: 778, Tru: 54, Fal: 10
average_stock 0 1 FALSE 7 emp: 776, 5-2: 22, 50-: 15, 1-5: 10
business_type 0 1 FALSE 4 res: 587, man: 242, emp: 10, oth: 3

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
declared_product_catalog_size 773 0.08 233.03 352.38 1 30 100 300 2e+03 ▇▁▁▁▁
declared_monthly_revenue 0 1.00 73377.68 1744799.18 0 0 0 0 5e+07 ▇▁▁▁▁
head(data_marketing$olist_closed_deals_dataset) %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))

MQL

skim(data_marketing$olist_marketing_qualified_leads_dataset)
Data summary
Name data_marketing$olist_mark…
Number of rows 8000
Number of columns 4
_______________________
Column type frequency:
factor 4
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
mql_id 0 1 FALSE 8000 000: 1, 000: 1, 001: 1, 002: 1
first_contact_date 0 1 FALSE 336 201: 93, 201: 76, 201: 75, 201: 71
landing_page_id 0 1 FALSE 495 b76: 912, 22c: 883, 583: 495, 887: 445
origin 0 1 FALSE 11 org: 2296, pai: 1586, soc: 1350, unk: 1099
head(data_marketing$olist_marketing_qualified_leads_dataset) %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))

EDA Closed Deals

Data wrangling

# merge dataframes
df_deals <- data_marketing$olist_closed_deals_dataset %>% 
  right_join(data_marketing$olist_marketing_qualified_leads_dataset, by = "mql_id")

# createa monthly dataframe for deals by origin by first contact date
df_deals_monthly <- df_deals %>% 
  mutate(first_contact_month = format.Date(as_date(first_contact_date), "%Y%m"),
         deal_closed = if_else(is.na(seller_id), 0, 1),
         origin = if_else(origin == "", "unknown", as.character(origin))) %>% 
  group_by(origin, first_contact_month) %>% 
  summarize(leads = n(),
            closed_deals = sum(deal_closed)) %>% 
  ungroup()

Analysis Leads

Leads Overall

plot_leads <- df_deals_monthly %>% 
  group_by(first_contact_month) %>% 
  summarize(leads = sum(leads)) %>% 
  ggplot(., aes(x = first_contact_month, y = leads, group = 1)) +
  geom_line() +
  geom_point() +
  theme_bw() +
  labs(x = "", y = "Number of Leads", colour = "origin")
ggplotly(plot_leads)
#plot_leads

Leads by Origin

plot_leads_origin <- df_deals_monthly %>% 
  select(-closed_deals) %>% 
  pivot_wider(., names_from = origin, values_from = leads) %>% 
  replace(is.na(.), 0) %>% 
  ggplot(., aes(x = first_contact_month, group = 1)) +
  geom_line(aes(y = direct_traffic, color = "direct_traffic")) +
  geom_line(aes(y = display, color = "display"))+
  geom_line(aes(y = email, color = "email")) +
  geom_line(aes(y = organic_search, color = "organic_search")) +
  geom_line(aes(y = paid_search, color = "paid_search")) +
  geom_line(aes(y = referral, color = "referral")) +
  geom_line(aes(y = social, color = "social")) +
  geom_line(aes(y = other_publicities, color = "other_publicities")) +
  geom_line(aes(y = other, color = "other")) +
  geom_line(aes(y = unknown, color = "unknown")) +
  geom_point(aes(y = direct_traffic, color = "direct_traffic")) +
  geom_point(aes(y = display, color = "display"))+
  geom_point(aes(y = email, color = "email")) +
  geom_point(aes(y = organic_search, color = "organic_search")) +
  geom_point(aes(y = paid_search, color = "paid_search")) +
  geom_point(aes(y = referral, color = "referral")) +
  geom_point(aes(y = social, color = "social")) +
  geom_point(aes(y = other_publicities, color = "other_publicities")) +
  geom_point(aes(y = other, color = "other")) +
  geom_point(aes(y = unknown, color = "unknown")) +
  theme_bw() +
  labs(x = "", y = "Number of Leads", colour = "origin")
ggplotly(plot_leads_origin)
#plot_leads_origin
plot_bar_leads_origin <- df_deals_monthly %>% 
  select(-closed_deals) %>% 
  group_by(origin) %>% 
  summarize(leads = sum(leads)) %>% 
  ggplot(., aes(x = reorder(origin, -leads), y = leads, group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = ..y..), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Number of Leads", colour = "origin") +
  scale_y_continuous(limits = c(0, 2350))
ggplotly(plot_bar_leads_origin)
#plot_bar_leads_origin
plot_bar_leads_origin_per <- df_deals_monthly %>% 
  select(-closed_deals) %>% 
  group_by(origin) %>% 
  summarize(leads = sum(leads)) %>% 
  ggplot(., aes(x = reorder(origin, -leads), y = leads / sum(leads), group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Number of Leads", colour = "origin") +
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.3)) 
ggplotly(plot_bar_leads_origin_per)
#plot_bar_leads_origin_per

Analysis Closed Deals

Closed Deals Overall

plot_leads <- df_deals_monthly %>% 
  group_by(first_contact_month) %>% 
  summarize(leads = sum(closed_deals)) %>% 
  ggplot(., aes(x = first_contact_month, y = leads, group = 1)) +
  geom_line() +
  geom_point() +
  theme_bw() +
  labs(x = "", y = "Number of Closed Deals", colour = "origin")
ggplotly(plot_leads)
# plot_leads

Leads by Origin

plot_leads_origin <- df_deals_monthly %>% 
  select(-leads) %>% 
  pivot_wider(., names_from = origin, values_from = closed_deals) %>% 
  replace(is.na(.), 0) %>% 
  ggplot(., aes(x = first_contact_month, group = 1)) +
  geom_line(aes(y = direct_traffic, color = "direct_traffic")) +
  geom_line(aes(y = display, color = "display"))+
  geom_line(aes(y = email, color = "email")) +
  geom_line(aes(y = organic_search, color = "organic_search")) +
  geom_line(aes(y = paid_search, color = "paid_search")) +
  geom_line(aes(y = referral, color = "referral")) +
  geom_line(aes(y = social, color = "social")) +
  geom_line(aes(y = other_publicities, color = "other_publicities")) +
  geom_line(aes(y = other, color = "other")) +
  geom_line(aes(y = unknown, color = "unknown")) +
  geom_point(aes(y = direct_traffic, color = "direct_traffic")) +
  geom_point(aes(y = display, color = "display"))+
  geom_point(aes(y = email, color = "email")) +
  geom_point(aes(y = organic_search, color = "organic_search")) +
  geom_point(aes(y = paid_search, color = "paid_search")) +
  geom_point(aes(y = referral, color = "referral")) +
  geom_point(aes(y = social, color = "social")) +
  geom_point(aes(y = other_publicities, color = "other_publicities")) +
  geom_point(aes(y = other, color = "other")) +
  geom_point(aes(y = unknown, color = "unknown")) +
  theme_bw() +
  labs(x = "", y = "Number of Closed Deals", colour = "origin")
ggplotly(plot_leads_origin)
# plot_leads_origin
plot_bar_leads_origin <- df_deals_monthly %>% 
  select(-leads) %>% 
  group_by(origin) %>% 
  summarize(leads = sum(closed_deals)) %>% 
  ggplot(., aes(x = reorder(origin, -leads), y = leads, group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = ..y..), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Number of Closed Deals", colour = "origin") +
  scale_y_continuous(limits = c(0, 300))
ggplotly(plot_bar_leads_origin)
# plot_bar_leads_origin
plot_bar_leads_origin_per <- df_deals_monthly %>% 
  select(-leads) %>% 
  group_by(origin) %>% 
  summarize(closed_deals = sum(closed_deals)) %>% 
  ggplot(., aes(x = reorder(origin, -closed_deals), y = closed_deals / sum(closed_deals), group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Number of Closed Deals", colour = "origin") +
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.35)) 
ggplotly(plot_bar_leads_origin_per)
# plot_bar_leads_origin_per

Conversion Rate

Closed Deals Overall

plot_conversion <- df_deals_monthly %>% 
  group_by(first_contact_month) %>% 
  summarize(leads = sum(leads),
            closed_deals = sum(closed_deals)) %>% 
  mutate(conversion_rate = closed_deals / leads) %>% 
  ggplot(., aes(x = first_contact_month, y = conversion_rate, group = 1)) +
  geom_line() +
  geom_point() +
  theme_bw() +
  labs(x = "", y = "Conversion Rate", colour = "origin") +
  scale_y_continuous(labels=scales::percent) 
ggplotly(plot_conversion)
# plot_conversion

Conversion Rate by Origin

2017

plot_bar_conversion_origin_2017 <- df_deals_monthly %>% 
  filter(substr(first_contact_month,1,4) == "2017") %>% 
  group_by(origin) %>% 
  summarize(leads = sum(leads),
            closed_deals = sum(closed_deals)) %>% 
  mutate(conversion_rate = closed_deals / leads) %>% 
  ggplot(., aes(x = reorder(origin, -conversion_rate), y = conversion_rate, group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Conversion Rate", colour = "origin") +
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.20))
ggplotly(plot_bar_conversion_origin_2017)
# plot_bar_conversion_origin_2017

2018

plot_bar_conversion_origin_2018 <- df_deals_monthly %>% 
  filter(substr(first_contact_month,1,4) == "2018") %>% 
  group_by(origin) %>% 
  summarize(leads = sum(leads),
            closed_deals = sum(closed_deals)) %>% 
  mutate(conversion_rate = closed_deals / leads) %>% 
  ggplot(., aes(x = reorder(origin, -conversion_rate), y = conversion_rate, group = 1)) +
  geom_col(aes(fill = origin))+
  geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
  theme_bw() +
  labs(x = "", y = "Conversion Rate", colour = "origin") +
  scale_y_continuous(labels=scales::percent, limits = c(0, 0.30))
ggplotly(plot_bar_conversion_origin_2018)
# plot_bar_conversion_origin_2018

Seller Characteristics

Data Wrangling

  • For 462 Sellers with closed deals no corresponding purchase information (55% of all sellers)
  • empty ("") observations in business_segment, lead_type, business_type, origin
df_deals <- df_deals %>% 
  mutate(deal_closed = if_else(is.na(seller_id), 0, 1)) # add boolean for closed deals

df_seller <- df_deals %>% 
  left_join(data$olist_order_items_dataset, by = "seller_id") %>% 
  filter(!is.na(seller_id)) %>% 
  group_by(seller_id, business_segment, lead_type, business_type, origin) %>% 
  summarize(closed_deals = sum(deal_closed),
            revenue = sum(price)) %>% 
  ungroup()

skim(df_seller)
Data summary
Name df_seller
Number of rows 842
Number of columns 7
_______________________
Column type frequency:
character 1
factor 4
numeric 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
seller_id 0 1 32 32 0 842 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
business_segment 0 1 FALSE 34 hom: 105, hea: 93, car: 77, hou: 71
lead_type 0 1 FALSE 9 onl: 332, onl: 126, ind: 123, off: 104
business_type 0 1 FALSE 4 res: 587, man: 242, emp: 10, oth: 3
origin 0 1 FALSE 11 org: 271, pai: 195, unk: 179, soc: 75

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
closed_deals 0 1.00 6.54 25.77 1.0 1.00 1.0 4 578 ▇▁▁▁▁
revenue 462 0.45 1781.19 6800.94 18.9 179.65 547.4 1286 113629 ▇▁▁▁▁

For first analysis: drop all sellers with NA Revenue and transform empty observations to “unknown”.

df_seller_man <- df_seller %>% 
  filter(!is.na(revenue)) %>% 
  mutate(business_segment = if_else(business_segment == "", "unknown", as.character(business_segment)),
         lead_type = if_else(lead_type == "", "unknown", as.character(lead_type)),
         business_type = if_else(business_type == "", "unknown", as.character(business_type)),
         origin = if_else(origin == "", "unknown", as.character(origin)),
         Revenue_per_Order = revenue / closed_deals) %>% 
  mutate_if(is.character, as.factor)

skim(df_seller_man)
Data summary
Name df_seller_man
Number of rows 380
Number of columns 8
_______________________
Column type frequency:
factor 5
numeric 3
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
seller_id 0 1 FALSE 380 012: 1, 01f: 1, 02f: 1, 03a: 1
business_segment 0 1 FALSE 29 hea: 45, hom: 44, hou: 44, con: 32
lead_type 0 1 FALSE 8 onl: 172, onl: 79, ind: 41, off: 30
business_type 0 1 FALSE 3 res: 287, man: 90, unk: 3
origin 0 1 FALSE 9 org: 113, pai: 101, unk: 85, dir: 31

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
closed_deals 0 1 13.27 37.30 1.00 2.00 5.00 12.00 578 ▇▁▁▁▁
revenue 0 1 1781.19 6800.94 18.90 179.65 547.40 1286.00 113629 ▇▁▁▁▁
Revenue_per_Order 0 1 169.00 286.16 9.99 49.94 87.39 160.66 2749 ▇▁▁▁▁

Business Segment

df_business_segment <- df_seller_man %>% 
  group_by(business_segment) %>% 
  summarize(revenue = sum(revenue),
            closed_deals = sum(closed_deals)) %>% 
  mutate(Revenue_per_Order = revenue / closed_deals,
         segment_small = substr(business_segment, 1, 6)) %>% 
  ungroup() %>% 
  arrange(desc(Revenue_per_Order))
df_business_segment %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))

Revene per Order

ggplot(df_business_segment, aes(x = reorder(segment_small, -Revenue_per_Order), y = Revenue_per_Order)) + 
  geom_col() +
  theme_bw() +
  labs(x = "Business segment", y = "Revenue per Order")

Orders

ggplot(df_business_segment, aes(x = reorder(segment_small, -closed_deals), y = closed_deals)) + 
  geom_col() +
  theme_bw() +
  labs(x = "Business segment", y = "Number of Orders")

Combined

ggplot(df_business_segment, aes(x = closed_deals, y = Revenue_per_Order)) + 
  geom_text(aes(label = segment_small)) +
  theme_bw() +
  labs(x = "Number of Orders", y = "Revenue per Order")

Lead Type

df_lead_type <- df_seller_man %>% 
  group_by(lead_type) %>% 
  summarize(revenue = sum(revenue),
            closed_deals = sum(closed_deals)) %>% 
  mutate(Revenue_per_Order = revenue / closed_deals,
         lead_small = substr(lead_type, 1, 5)) %>% 
  ungroup() %>% 
  arrange(desc(Revenue_per_Order))
df_lead_type %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))

Revenue

ggplot(df_lead_type, aes(x = reorder(lead_type, -revenue), y = revenue)) + 
  geom_col() +
  theme_bw() +
  labs(x = "Lead Type", y = "Revenue")

Revene per Order

ggplot(df_lead_type, aes(x = reorder(lead_type, -Revenue_per_Order), y = Revenue_per_Order)) + 
  geom_col() +
  theme_bw() +
  labs(x = "Lead Type", y = "Revenue per Order")

Orders

ggplot(df_lead_type, aes(x = reorder(lead_type, -closed_deals), y = closed_deals)) + 
  geom_col() +
  theme_bw() +
  labs(x = "Lead Type", y = "Number of Orders")

Combined

ggplot(df_lead_type, aes(x = closed_deals, y = Revenue_per_Order)) + 
  geom_text(aes(label = lead_type)) +
  theme_bw() +
  labs(x = "Number of Orders", y = "Revenue per Order")

Regression Analysis

# relevel business segemnt for better comparison
df_seller_man <- within(df_seller_man,business_segment <- relevel(business_segment, ref = 2))

lm_seller_deals <- lm(closed_deals ~ business_segment + lead_type + origin + business_type, data = df_seller_man)

summary(lm_seller_deals)
## 
## Call:
## lm(formula = closed_deals ~ business_segment + lead_type + origin + 
##     business_type, data = df_seller_man)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -183.61   -9.44   -3.70    3.89  364.66 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                      -6.9247    10.6753  -0.649
## business_segmentair_conditioning                 -4.8921    25.2280  -0.194
## business_segmentbaby                             -2.8331    15.4100  -0.184
## business_segmentbags_backpacks                    6.9612    11.1987   0.622
## business_segmentbed_bath_table                   15.5402    12.7164   1.222
## business_segmentbooks                            16.9975    18.4511   0.921
## business_segmentcar_accessories                  -3.1090     8.8908  -0.350
## business_segmentcomputers                         2.8141    12.1129   0.232
## business_segmentconstruction_tools_house_garden   2.4132     8.7559   0.276
## business_segmentfashion_accessories               2.0389    15.3596   0.133
## business_segmentfood_drink                        3.5310    13.2767   0.266
## business_segmentfood_supplement                   7.8826    13.9343   0.566
## business_segmentgames_consoles                   -5.8770    35.0201  -0.168
## business_segmentgifts                             3.0202    21.0013   0.144
## business_segmenthandcrafted                       6.9934    22.5303   0.310
## business_segmenthealth_beauty                    13.1238     8.2159   1.597
## business_segmenthome_appliances                  21.0720    16.6052   1.269
## business_segmenthome_decor                        3.3444     8.2296   0.406
## business_segmenthome_office_furniture            31.2110    21.2495   1.469
## business_segmenthousehold_utilities               7.9587     8.1576   0.976
## business_segmentmusic_instruments                -4.4558    18.3112  -0.243
## business_segmentparty                            19.5763    35.9591   0.544
## business_segmentpet                              10.7230    10.7250   1.000
## business_segmentphone_mobile                     10.8452    17.5510   0.618
## business_segmentsmall_appliances                  0.2652    15.1925   0.017
## business_segmentsports_leisure                    4.5255    11.2215   0.403
## business_segmentstationery                        3.3163    14.6162   0.227
## business_segmenttoys                              7.1314    12.5913   0.566
## business_segmentwatches                         192.9446    20.9722   9.200
## lead_typeoffline                                -12.3781     9.2437  -1.339
## lead_typeonline_beginner                          1.6527     9.6251   0.172
## lead_typeonline_big                              14.6871     7.0341   2.088
## lead_typeonline_medium                            4.1717     6.2856   0.664
## lead_typeonline_small                             8.8368     8.8846   0.995
## lead_typeonline_top                               2.5438    15.4583   0.165
## lead_typeunknown                                  9.9978    21.6836   0.461
## origindisplay                                    -1.5010    25.4692  -0.059
## originemail                                      -9.4293    16.4924  -0.572
## originorganic_search                              3.9567     7.1893   0.550
## originother                                      32.0729    25.3783   1.264
## originpaid_search                                 2.7265     7.2631   0.375
## originreferral                                    1.5886    13.6553   0.116
## originsocial                                      4.5100     9.1208   0.494
## originunknown                                     6.9417     7.4945   0.926
## business_typereseller                             5.6883     4.6977   1.211
## business_typeunknown                             10.3336    24.2535   0.426
##                                                 Pr(>|t|)    
## (Intercept)                                       0.5170    
## business_segmentair_conditioning                  0.8464    
## business_segmentbaby                              0.8542    
## business_segmentbags_backpacks                    0.5346    
## business_segmentbed_bath_table                    0.2225    
## business_segmentbooks                             0.3576    
## business_segmentcar_accessories                   0.7268    
## business_segmentcomputers                         0.8164    
## business_segmentconstruction_tools_house_garden   0.7830    
## business_segmentfashion_accessories               0.8945    
## business_segmentfood_drink                        0.7904    
## business_segmentfood_supplement                   0.5720    
## business_segmentgames_consoles                    0.8668    
## business_segmentgifts                             0.8857    
## business_segmenthandcrafted                       0.7564    
## business_segmenthealth_beauty                     0.1111    
## business_segmenthome_appliances                   0.2053    
## business_segmenthome_decor                        0.6847    
## business_segmenthome_office_furniture             0.1428    
## business_segmenthousehold_utilities               0.3300    
## business_segmentmusic_instruments                 0.8079    
## business_segmentparty                             0.5865    
## business_segmentpet                               0.3181    
## business_segmentphone_mobile                      0.5370    
## business_segmentsmall_appliances                  0.9861    
## business_segmentsports_leisure                    0.6870    
## business_segmentstationery                        0.8206    
## business_segmenttoys                              0.5715    
## business_segmentwatches                           <2e-16 ***
## lead_typeoffline                                  0.1815    
## lead_typeonline_beginner                          0.8638    
## lead_typeonline_big                               0.0376 *  
## lead_typeonline_medium                            0.5073    
## lead_typeonline_small                             0.3206    
## lead_typeonline_top                               0.8694    
## lead_typeunknown                                  0.6450    
## origindisplay                                     0.9530    
## originemail                                       0.5679    
## originorganic_search                              0.5824    
## originother                                       0.2072    
## originpaid_search                                 0.7076    
## originreferral                                    0.9075    
## originsocial                                      0.6213    
## originunknown                                     0.3550    
## business_typereseller                             0.2268    
## business_typeunknown                              0.6703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34.24 on 334 degrees of freedom
## Multiple R-squared:  0.2572, Adjusted R-squared:  0.1571 
## F-statistic:  2.57 on 45 and 334 DF,  p-value: 9.235e-07
lm_seller_revenue <- lm(Revenue_per_Order ~ business_segment + lead_type + origin + business_type, data = df_seller_man)

summary(lm_seller_revenue)
## 
## Call:
## lm(formula = Revenue_per_Order ~ business_segment + lead_type + 
##     origin + business_type, data = df_seller_man)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -951.67 -109.71  -45.14   39.05 2362.85 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                       120.51      86.13   1.399
## business_segmentair_conditioning                  933.63     203.54   4.587
## business_segmentbaby                               51.92     124.33   0.418
## business_segmentbags_backpacks                   -114.59      90.35  -1.268
## business_segmentbed_bath_table                   -159.26     102.60  -1.552
## business_segmentbooks                            -161.11     148.86  -1.082
## business_segmentcar_accessories                    77.48      71.73   1.080
## business_segmentcomputers                        -155.06      97.73  -1.587
## business_segmentconstruction_tools_house_garden    41.77      70.64   0.591
## business_segmentfashion_accessories              -128.99     123.92  -1.041
## business_segmentfood_drink                       -157.02     107.12  -1.466
## business_segmentfood_supplement                   -69.80     112.42  -0.621
## business_segmentgames_consoles                     46.97     282.54   0.166
## business_segmentgifts                             -33.16     169.44  -0.196
## business_segmenthandcrafted                      -206.50     181.77  -1.136
## business_segmenthealth_beauty                    -145.55      66.29  -2.196
## business_segmenthome_appliances                   -29.26     133.97  -0.218
## business_segmenthome_decor                        -96.08      66.40  -1.447
## business_segmenthome_office_furniture            -132.93     171.44  -0.775
## business_segmenthousehold_utilities               -66.24      65.82  -1.006
## business_segmentmusic_instruments                 121.77     147.73   0.824
## business_segmentparty                            -238.59     290.12  -0.822
## business_segmentpet                              -103.45      86.53  -1.196
## business_segmentphone_mobile                     -163.34     141.60  -1.154
## business_segmentsmall_appliances                   71.50     122.57   0.583
## business_segmentsports_leisure                    -66.21      90.54  -0.731
## business_segmentstationery                        -92.60     117.92  -0.785
## business_segmenttoys                             -131.81     101.59  -1.298
## business_segmentwatches                           -60.42     169.20  -0.357
## lead_typeoffline                                  163.53      74.58   2.193
## lead_typeonline_beginner                          140.91      77.66   1.815
## lead_typeonline_big                                73.37      56.75   1.293
## lead_typeonline_medium                             36.94      50.71   0.728
## lead_typeonline_small                              31.58      71.68   0.441
## lead_typeonline_top                               101.03     124.72   0.810
## lead_typeunknown                                  354.70     174.94   2.028
## origindisplay                                     -98.91     205.49  -0.481
## originemail                                       344.62     133.06   2.590
## originorganic_search                               62.01      58.00   1.069
## originother                                        82.27     204.75   0.402
## originpaid_search                                  84.20      58.60   1.437
## originreferral                                    128.47     110.17   1.166
## originsocial                                       25.50      73.59   0.347
## originunknown                                      34.33      60.47   0.568
## business_typereseller                             -14.76      37.90  -0.389
## business_typeunknown                               31.88     195.68   0.163
##                                                 Pr(>|t|)    
## (Intercept)                                       0.1627    
## business_segmentair_conditioning                6.37e-06 ***
## business_segmentbaby                              0.6765    
## business_segmentbags_backpacks                    0.2056    
## business_segmentbed_bath_table                    0.1215    
## business_segmentbooks                             0.2799    
## business_segmentcar_accessories                   0.2809    
## business_segmentcomputers                         0.1135    
## business_segmentconstruction_tools_house_garden   0.5547    
## business_segmentfashion_accessories               0.2987    
## business_segmentfood_drink                        0.1436    
## business_segmentfood_supplement                   0.5351    
## business_segmentgames_consoles                    0.8681    
## business_segmentgifts                             0.8450    
## business_segmenthandcrafted                       0.2568    
## business_segmenthealth_beauty                     0.0288 *  
## business_segmenthome_appliances                   0.8272    
## business_segmenthome_decor                        0.1488    
## business_segmenthome_office_furniture             0.4387    
## business_segmenthousehold_utilities               0.3149    
## business_segmentmusic_instruments                 0.4104    
## business_segmentparty                             0.4114    
## business_segmentpet                               0.2327    
## business_segmentphone_mobile                      0.2495    
## business_segmentsmall_appliances                  0.5601    
## business_segmentsports_leisure                    0.4651    
## business_segmentstationery                        0.4329    
## business_segmenttoys                              0.1954    
## business_segmentwatches                           0.7212    
## lead_typeoffline                                  0.0290 *  
## lead_typeonline_beginner                          0.0705 .  
## lead_typeonline_big                               0.1969    
## lead_typeonline_medium                            0.4669    
## lead_typeonline_small                             0.6598    
## lead_typeonline_top                               0.4185    
## lead_typeunknown                                  0.0434 *  
## origindisplay                                     0.6306    
## originemail                                       0.0100 *  
## originorganic_search                              0.2858    
## originother                                       0.6881    
## originpaid_search                                 0.1517    
## originreferral                                    0.2444    
## originsocial                                      0.7292    
## originunknown                                     0.5705    
## business_typereseller                             0.6972    
## business_typeunknown                              0.8707    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 276.3 on 334 degrees of freedom
## Multiple R-squared:  0.1786, Adjusted R-squared:  0.06795 
## F-statistic: 1.614 on 45 and 334 DF,  p-value: 0.01027